from src.module.TongyiModel import TongyiModel

from langchain_core.messages import HumanMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser


class Multimodal:
    def __init__(self):
        self.model = TongyiModel().model

    def start(self):
        parser = StrOutputParser()
        prompt = ChatPromptTemplate.from_messages([
            ('placeholder', '{question}')
        ])

        chain = prompt | self.model | parser
        print(chain.invoke({"question": [HumanMessage(content=[
            { 'text': '你看到了什么' },
            { 'image': 'https://lilianweng.github.io/posts/2023-06-23-agent/agent-overview.png' },
            { 'image': 'https://lilianweng.github.io/posts/2023-06-23-agent/agent-overview.png' }
        ])]}))

